#Set up

rm(list=ls())

#Set your working directory - this will be the starting folder where all other folders are created
setwd ("C:/Users/cecidy/Google Drive/ZH_analysis/")

source("ZHfunctions.R")
options(scipen=9999)

#######################################################################################################
#######################################################################################################

#Analysis for the paper:

#"Assessing multi-dimensional sustainability: lessons from Brazil's social protection programs"


#Robustness analysis

#Step 1: Create CBGPS weights
#Step 2: Run multivariate regression models, weighted by CBGPS

#######################################################################################################

##Functions sourced from ZHfunctions.R, in combination with mapply are used to carry out the analysis

#There are 24 programme-outcome combinations and thus 24 models
#Arguments needed for the functions are read from the lists below
#The lists are sourced again before each function is run
#Analysis starts around line 375

#######################################################################################################
######################################################################################################

#allListsStart

#A list of directory where the script for each model lie
#If all scripts lie in the same folder, repeat 24 times (one per programme-outcome combination)
Folderlist <- list("ProgrammeOutcome/ZH/Kcal/",
                   "ProgrammeOutcome/BF/Kcal/",
                   "ProgrammeOutcome/PRONAF/Kcal/",
                   
                   "ProgrammeOutcome/ZH/Protein/",
                   "ProgrammeOutcome/BF/Protein/",
                   "ProgrammeOutcome/PRONAF/Protein/",
                   
                   "ProgrammeOutcome/ZH/MPI00to10/",
                   "ProgrammeOutcome/BF/MPI00to10/",
                   "ProgrammeOutcome/PRONAF/MPI00to10/",
                   
                   "ProgrammeOutcome/ZH/MPI04to13/",
                   "ProgrammeOutcome/BF/MPI04to13/",
                   "ProgrammeOutcome/PRONAF/MPI04to13/",
                   
                   "ProgrammeOutcome/ZH/Childmal/",
                   "ProgrammeOutcome/BF/Childmal/",
                   "ProgrammeOutcome/PRONAF/Childmal/",
                   
                   "ProgrammeOutcome/ZH/InfMort00to10/",
                   "ProgrammeOutcome/BF/InfMort00to10/",
                   "ProgrammeOutcome/PRONAF/InfMort00to10/",
                   
                   "ProgrammeOutcome/ZH/InfMort04to13/",
                   "ProgrammeOutcome/BF/InfMort04to13/",
                   "ProgrammeOutcome/PRONAF/InfMort04to13/",
                   
                   "ProgrammeOutcome/ZH/NatVeg/",
                   "ProgrammeOutcome/BF/NatVeg/",
                   "ProgrammeOutcome/PRONAF/NatVeg/")

Scriptlist <- list("1.ZHandKcal_robustness.R",
                   "2.BFandKcal_robustness.R",
                   "3.PRONAFandKcal_robustness.R",
                   "4.ZHandProtein_robustness.R",
                   "5.BFandProtein_robustness.R",
                   "6.PRONAFandProtein_robustness.R",
                   "7.ZHandMPI00to10_robustness.R",
                   "8.BFandMPI00to10_robustness.R",
                   "9.PRONAFandMPI00to10_robustness.R",
                   "10.ZHandMPI04_robustness.R",
                   "11.BFandMPI04_robustness.R",
                   "12.PRONAFandMPI04_robustness.R",
                   "13.ZHandChildmal_robustness.R",
                   "14.BFandChildmal_robustness.R",
                   "15.PRONAFandChildmal_robustness.R",
                   "16.ZHandInfmort00to10_robustness.R",
                   "17.BFandInfmort00to10_robustness.R",
                   "18.PRONAFandInfmort00to10_robustness.R",
                   "19.ZHandInfmort04to13_robustness.R",
                   "20.BFandInfmort04to13_robustness.R",
                   "21.PRONAFandInfmort04to13_robustness.R",
                   "22.ZHandNatVeg_robustness.R",
                   "23.BFandNatVeg_robustness.R",
                   "24.PRONAFandNatVeg_robustness.R")

#Bookmarks in scripts to indicate rows from where to start to run script
#OBS in natural vegetation models also need temp1 to make the correct robustness sample
startList <- list("#start2_","#start2_","#start2_",
                  "#start2_","#start2_","#start2_",
                  "#start2_","#start2_","#start2_",
                  "#start2_","#start2_","#start2_",
                  "#temp1","#temp1","#temp1",
                  "#temp1","#temp1","#temp1",
                  "#temp1","#temp1","#temp1",
                  "#temp1","#temp1","#temp1")

#Bookmarks in scripts to indicate rows from where to stop to run script
#OBS in natural vegetation models also need temp1 to make the correct robustness sample
endList <- list("#fin2_","#fin2_","#fin2_",
                "#fin2_","#fin2_","#fin2_",
                "#fin2_","#fin2_","#fin2_",
                "#fin2_","#fin2_","#fin2_",
                "#temp2","#temp2","#temp2",
                "#temp2","#temp2","#temp2",
                "#temp2","#temp2","#temp2",
                "#temp2","#temp2","#temp2")

#YearSampList, TimeperiodList and ProgList are lists used to read in the correct
#election variable used in each model.

YearSampList <- list("2004","2004","2004","2004","2004","2004",
                     "2000","2000","2000","2004","2004","2004",
                     "2004","2004","2004", 
                     "2000","2000","2000","2004","2004","2004",
                     "2004","2004","2004")


TimeperiodList <- list("2004to2013","2004to2013","2004to2013","2004to2013","2004to2013","2004to2013",
                       "2000to2010","2000to2010","2000to2010","2004to2013","2004to2013","2004to2013",
                       "2004to2013","2004to2013","2004to2013",
                       "2000to2010","2000to2010","2000to2010","2004to2013","2004to2013","2004to2013",
                       "2004to2013","2004to2013","2004to2013")

ProgList <- list("ZH","BF","PRONAF","ZH","BF","PRONAF",
                 "ZH","BF","PRONAF","ZH","BF","PRONAF",
                 "ZH","BF","PRONAF",
                 "ZH","BF","PRONAF","ZH","BF","PRONAF",
                 "ZH","BF","PRONAF")

#fitnamesave - this will be the same for ALL!

#List of folders where the outcomes (e.g. CBGPS weights) should be saved
#These are put in separate folder per programme-outcome, and named the same, so that the files can easily be read in again
#and used in another step
FolderSavelist <- list("ProgrammeOutcome/ZH/Kcal/",
                       "ProgrammeOutcome/BF/Kcal/",
                       "ProgrammeOutcome/PRONAF/Kcal/",
                       
                       "ProgrammeOutcome/ZH/Protein/",
                       "ProgrammeOutcome/BF/Protein/",
                       "ProgrammeOutcome/PRONAF/Protein/",
                       
                       "ProgrammeOutcome/ZH/MPI00to10/",
                       "ProgrammeOutcome/BF/MPI00to10/",
                       "ProgrammeOutcome/PRONAF/MPI00to10/",
                       
                       "ProgrammeOutcome/ZH/MPI04to13/",
                       "ProgrammeOutcome/BF/MPI04to13/",
                       "ProgrammeOutcome/PRONAF/MPI04to13/",
                       
                       "ProgrammeOutcome/ZH/Childmal/",
                       "ProgrammeOutcome/BF/Childmal/",
                       "ProgrammeOutcome/PRONAF/Childmal/",
                       
                       "ProgrammeOutcome/ZH/InfMort00to10/",
                       "ProgrammeOutcome/BF/InfMort00to10/",
                       "ProgrammeOutcome/PRONAF/InfMort00to10/",
                       
                       "ProgrammeOutcome/ZH/InfMort04to13/",
                       "ProgrammeOutcome/BF/InfMort04to13/",
                       "ProgrammeOutcome/PRONAF/InfMort04to13/",
                       
                       "ProgrammeOutcome/ZH/NatVeg/",
                       "ProgrammeOutcome/BF/NatVeg/",
                       "ProgrammeOutcome/PRONAF/NatVeg/")

allfit2names <- list("ZHandKcal04to13mpi00FULL",
                     "BFandKcal04to13mpi00FULL",
                     "PRONAFandKcal04to13mpi00FULL",
                     "ZHandProtein04to13mpi00FULL",
                     "BFandProtein04to13mpi00FULL",
                     "PRONAFandProtein04to13mpi00FULL",
                     
                     "ZHandMPI00to10onlineFULL",
                     "BFandMPI00to10onlineFULL",
                     "PRONAFandMPI00to10onlineFULL",
                     "ZHandMPI04FULL",
                     "BolsaFamiliaMPI04FULL",
                     "PronafandMPI04FULL",
                     
                     "ZHandChildMal04to13fullnewest",
                     "BFandChildMal04to13fullnew",
                     "PRONAFandChildMal04to13FULLnewest",
                     
                     "ZHandInfMort00to10FULL",
                     "BFandInfMort00to10FULL",
                     "PRONAFandInfMort00to10FULL",
                     
                     "ZHandInfMortI04to13FULLNEW",
                     "BFandInfMort04to13FULLNEW",
                     "PRONAFandInfMort04to13FULLNEW",
                     
                     "ZHandNatVeg04to13NEWfinalmpi00FUL",
                     "BFandNatVeg04to13NEWfinalmpi00FULL",
                     "PRONAFandNatVeg04to13NEWfinalmpi00FULL")

#Three different variations of biome are used, depending on the model,
#Biomefor2004Analysis is for models where municipality borders are adjusted to 2004 boundaries, i.e. 2004-2013 time-frame models
#Biomefor2000Analysis is for models where municipality borders are adjusted to 2000 boundaries, i.e. 2000-2010 time-frame models
#Biomefor2004AnalysisNoHybrid is used in Natural vegetation models, are adjusted to 2004 boundaris, and only have main biome categories (no transition ones)

biomeL <- list("Biomefor2004Analysis","Biomefor2004Analysis","Biomefor2004Analysis",
               "Biomefor2004Analysis","Biomefor2004Analysis","Biomefor2004Analysis",
               "Biomefor2000Analysis","Biomefor2000Analysis","Biomefor2000Analysis",
               "Biomefor2004Analysis","Biomefor2004Analysis","Biomefor2004Analysis",
               
               "Biomefor2004Analysis","Biomefor2004Analysis","Biomefor2004Analysis",
               "Biomefor2000Analysis","Biomefor2000Analysis","Biomefor2000Analysis",
               "Biomefor2004Analysis","Biomefor2004Analysis","Biomefor2004Analysis",
               
               "Biomefor2004AnalysisNoHybrid","Biomefor2004AnalysisNoHybrid","Biomefor2004AnalysisNoHybrid")

#The name of the created parametric CBGPS weights for each programme-outcome combination created using  GetCBGPSfit2 
#Note that models 13-21 (Child malnutrition, Infant mortality 00-10 and Infant mortality 4-13 are named slightly differently, i.e. with "_noInfl)
#These 9 CBGPS weights are created based on samples AFTER 
#running an initial CBGPS weighted regression model and then taking out influention points (based on Dfbeta) - se SI Appendix Text for explanation

fit2L <- list("fit2fullqualityElectionW.rda","fit2fullqualityElectionW.rda","fit2fullqualityElectionW.rda",
              "fit2fullqualityElectionW.rda","fit2fullqualityElectionW.rda","fit2fullqualityElectionW.rda",
              "fit2fullqualityElectionW.rda","fit2fullqualityElectionW.rda","fit2fullqualityElectionW.rda",
              "fit2fullqualityElectionW.rda","fit2fullqualityElectionW.rda","fit2fullqualityElectionW.rda",
              
              "fit2qualityElectionW_noInfl.rda","fit2qualityElectionW_noInfl.rda","fit2qualityElectionW_noInfl.rda",
              "fit2qualityElectionW_noInfl.rda","fit2qualityElectionW_noInfl.rda","fit2qualityElectionW_noInfl.rda",
              "fit2qualityElectionW_noInfl.rda","fit2qualityElectionW_noInfl.rda","fit2qualityElectionW_noInfl.rda",
              
              "fit2fullqualityElectionW.rda","fit2fullqualityElectionW.rda","fit2fullqualityElectionW.rda")


#The name of the created non-parametric CBGPS weights for each programme-outcome combination created using GetCBGPSfit3
#Note that models 13-21 (Child malnutrition, Infant mortality 00-10 and Infant mortality 4-13 are named slightly differently, i.e. with "_noInfl)
#These 9 CBGPS weights are created based on samples AFTER 
#Running an initial CBGPS weighted regression model and then taking out influention points (based on Dfbeta) - se SI Appendix Text for explanation

fit3L <- list("fit3fullqualityElectionW.rda","fit3fullqualityElectionW.rda","fit3fullqualityElectionW.rda",
              "fit3fullqualityElectionW.rda","fit3fullqualityElectionW.rda","fit3fullqualityElectionW.rda",
              "fit3fullqualityElectionW.rda","fit3fullqualityElectionW.rda","fit3fullqualityElectionW.rda",
              "fit3fullqualityElectionW.rda","fit3fullqualityElectionW.rda","fit3fullqualityElectionW.rda",
              
              "fit3qualityElectionW_noInfl.rda","fit3qualityElectionW_noInfl.rda","fit3qualityElectionW_noInfl.rda",
              "fit3qualityElectionW_noInfl.rda","fit3qualityElectionW_noInfl.rda","fit3qualityElectionW_noInfl.rda",
              "fit3qualityElectionW_noInfl.rda","fit3qualityElectionW_noInfl.rda","fit3qualityElectionW_noInfl.rda",
              
              "fit3fullqualityElectionW.rda","fit3fullqualityElectionW.rda","fit3fullqualityElectionW.rda")

#allListsEnd

################################################################################################

#Lists which only include the 9 programme-outcome combinations for Child malnutrition, Infant mortality 00-10 and Infant mortality 04-13

#These lists are used separately to run non-robust CBGPS weighted regressions, identify and exclude influential points (based on Dfbetas)
#and run final CBGPS weights and robust multivariate regression models

#(For the other 15 programme-outcome combinations robust regressions using the robustbase package is used thus this step is not nescessary)


#QPNBListsStart

#A list of directory where the script for each model lie
#If all scripts lie in the same folder, repeat 24 times (one per programme-outcome combination)
Folderlist <- list("ProgrammeOutcome/ZH/Childmal/",
                   "ProgrammeOutcome/BF/Childmal/",
                   "ProgrammeOutcome/PRONAF/Childmal/",
                   
                   "ProgrammeOutcome/ZH/InfMort00to10/",
                   "ProgrammeOutcome/BF/InfMort00to10/",
                   "ProgrammeOutcome/PRONAF/InfMort00to10/",
                   
                   "ProgrammeOutcome/ZH/InfMort04to13/",
                   "ProgrammeOutcome/BF/InfMort04to13/",
                   "ProgrammeOutcome/PRONAF/InfMort04to13/")

#List of script for each programme-outcome combination - these scripts subset the appropriate variables and rows for each model
Scriptlist <- list("13.ZHandChildmal_robustness.R",
                   "14.BFandChildmal_robustness.R",
                   "15.PRONAFandChildmal_robustness.R",
                   "16.ZHandInfmort00to10_robustness.R",
                   "17.BFandInfmort00to10_robustness.R",
                   "18.PRONAFandInfmort00to10_robustness.R",
                   "19.ZHandInfmort04to13_robustness.R",
                   "20.BFandInfmort04to13_robustness.R",
                   "21.PRONAFandInfmort04to13_robustness.R")

#Bookmarks in scripts to indicate rows from where to start to run script
startList <- list("#temp1","#temp1","#temp1",
                  "#temp1","#temp1","#temp1",
                  "#temp1","#temp1","#temp1")

#Bookmarks in scripts to indicate rows from where to stop to run script
endList <- list("#temp2","#temp2","#temp2",
                "#temp2","#temp2","#temp2",
                "#temp2","#temp2","#temp2")

#YearSampList, TimeperiodList and ProgList are lists used to read in the correct
#election variable used in each model.

YearSampList <- list("2004","2004","2004", 
                     "2000","2000","2000","2004","2004","2004")

TimeperiodList <- list("2004to2013","2004to2013","2004to2013",
                       "2000to2010","2000to2010","2000to2010","2004to2013","2004to2013","2004to2013")

ProgList <- list("ZH","BF","PRONAF",
                 "ZH","BF","PRONAF","ZH","BF","PRONAF")

#fitnamesave - this will be the same for ALL!

#List of folders where the outcomes (e.g. CBGPS weights) should be saved
#These are put in separate folder per programme-outcome, and named the same, so that the files can easily be read in again
#and used in another step

FolderSavelist <- list("ProgrammeOutcome/ZH/Childmal/",
                       "ProgrammeOutcome/BF/Childmal/",
                       "ProgrammeOutcome/PRONAF/Childmal/",
                       
                       "ProgrammeOutcome/ZH/InfMort00to10/",
                       "ProgrammeOutcome/BF/InfMort00to10/",
                       "ProgrammeOutcome/PRONAF/InfMort00to10/",
                       
                       "ProgrammeOutcome/ZH/InfMort04to13/",
                       "ProgrammeOutcome/BF/InfMort04to13/",
                       "ProgrammeOutcome/PRONAF/InfMort04to13/")

allfit2names <- list("ZHandChildMal04to13fullnewest",
                     "BFandChildMal04to13fullnew",
                     "PRONAFandChildMal04to13FULLnewest",
                     
                     "ZHandInfMort00to10FULL",
                     "BFandInfMort00to10FULL",
                     "PRONAFandInfMort00to10FULL",
                     
                     "ZHandInfMortI04to13FULLNEW",
                     "BFandInfMort04to13FULLNEW",
                     "PRONAFandInfMort04to13FULLNEW")

#The name of the created parametric CBGPS weights for each programme-outcome combination created using  GetCBGPSfit2 
fit2L <- list("fit2fullqualityElectionW.rda","fit2fullqualityElectionW.rda","fit2fullqualityElectionW.rda",
              "fit2fullqualityElectionW.rda","fit2fullqualityElectionW.rda","fit2fullqualityElectionW.rda",
              "fit2fullqualityElectionW.rda","fit2fullqualityElectionW.rda","fit2fullqualityElectionW.rda")

#The name of the created non-parametric CBGPS weights for each programme-outcome combination created using GetCBGPSfit3
fit3L <- list("fit3fullqualityElectionW.rda","fit3fullqualityElectionW.rda","fit3fullqualityElectionW.rda",
              "fit3fullqualityElectionW.rda","fit3fullqualityElectionW.rda","fit3fullqualityElectionW.rda",
              "fit3fullqualityElectionW.rda","fit3fullqualityElectionW.rda","fit3fullqualityElectionW.rda")


#Two different variations of biome are used, depending on the model,
#Biomefor2004Analysis is for models where municipality borders are adjusted to 2004 boundaries, i.e. 2004-2013 time-frame models
#Biomefor2000Analysis is for models where municipality borders are adjusted to 2000 boundaries, i.e. 2000-2010 time-frame models
biomeL <- list("Biomefor2004Analysis","Biomefor2004Analysis","Biomefor2004Analysis",
               "Biomefor2000Analysis","Biomefor2000Analysis","Biomefor2000Analysis",
               "Biomefor2004Analysis","Biomefor2004Analysis","Biomefor2004Analysis")

#QPNBListsEnd

#######################################################################################################
#######################################################################################################

#Check that sourcing all scripts work (function just runs through all programme-outcome scripts and returns sample information)
sourcePartial("ZH_CBGPSandRegression_robustness.R",startTag="#allListsStart",endTag="#allListsEnd")
test <- mapply(function(x,y,z,w,b,c,d,e,f) tempfunc(x,y,z,w,b,c,d,e,f,"fit2fullqualityElectionW"),Folderlist,Scriptlist,startList,endList,
                         YearSampList,TimeperiodList,ProgList,biomeL,FolderSavelist,SIMPLIFY = FALSE)
testdf <- data.frame(do.call("rbind",test))

#######################################################################################################
#######################################################################################################

#1. Create cbgps weights 

#######################################################################################################
#######################################################################################################

#1.1 Use function GetCBGPSfit2 and GetCBGPSfit3 on all programme-outcome samples

#GetCBGPSfit2 reads in the correct sample for an outcome variable and creates parametric CBGPS weights
#GetCBGPSfit3 reads in the correct sample for an outcome varible and creates non-parametric CBGPS weights

#GetCBGPSfit2 and GetCBGPSfit3 are used in combination with mapply so that weights for all outcome variables
#are created in one go and saved in their respective folder

#To create one CBGPS weight at a time
checkone <- GetCBGPSfit2("ProgrammeOutcome/ZH/Kcal/","1.ZHandKcal_robustness.R",
                         "#start2_","#fin2_",yearsample="2004",timeperiod="2004to2013",progr="ZH","Biomefor2004Analysis",
                         "ProgrammeOutcome/ZH/Kcal/","TestFin2020quality")

#To get summary information for CBGPS weights
fullmod <- loadRData(paste("ProgrammeOutcome/ZH/Kcal/","TestFin2020quality.rda",sep=""))#file
CBPStabletest <- CBPStable(fullmod,"ZHandKcal04to13mpi00FULL")

##################################################################################################

#For all core models - parametric CBGPS
sourcePartial("ZH_CBGPSandRegression_robustness.R",startTag="#allListsStart",endTag="#allListsEnd")

#GetCBGPSfit2 creates parametric CBGPS weights AND saves them in their respective programme-outcome folder
#the outcome is a dataframe with information about sample size and reference state/biomes (the category with the least observations)
allModFullFit2 <- mapply(function(x,y,z,w,b,c,d,e,f) GetCBGPSfit2(x,y,z,w,b,c,d,e,f,"fit2fullqualityElectionW"),Folderlist,Scriptlist,startList,endList,
                         YearSampList,TimeperiodList,ProgList,biomeL,FolderSavelist,SIMPLIFY = FALSE)

#The outcome can be gathered in a dataframe and saved as csv (useful to check sample sizes and reference states/biomes used)
allModFullFit2DF <- data.frame(do.call("rbind",allModFullFit2))
write.csv(allModFullFit2DF, file ="Results/allfit2ModsQuality_Full_W_election.csv")

#################################################################################################################

#For all core models - non-parametric CBGPS (obs one CBGPS weights can take up to one hour to create)

#OBS need to source the lists again
sourcePartial("ZH_CBGPSandRegression_robustness.R",startTag="#allListsStart",endTag="#allListsEnd")

#GetCBGPSfit3 creates non-parametric CBGPS weights AND saves them in their respective programme-outcome folder
allModFullFit3 <- mapply(function(x,y,z,w,b,c,d,e,f) GetCBGPSfit3(x,y,z,w,b,c,d,e,f,"fit3fullqualityElectionW"),Folderlist,Scriptlist,startList,endList,
                         YearSampList,TimeperiodList,ProgList,biomeL,FolderSavelist,SIMPLIFY = FALSE)

#The oucome - sample sizes and reference biome/states - can be saved as a csv for reference
allModFullFit3DF <- data.frame(do.call("rbind",allModFullFit3))
write.csv(allModFullFit3DF, file ="Results/allfit3ModsQuality_Full_W_election.csv")

##################################################################################################################

#2. Compare fit2 and fit3

sourcePartial("ZH_CBGPSandRegression_robustness.R",startTag="#allListsStart",endTag="#allListsEnd")

#getAllCBPStable gets out key summary information for the CBGPS weights
allFit2ResQPNB <- mapply(function(x,y) getAllCBPStable(x,"fit2fullqualityElectionW.rda",y),FolderSavelist,allfit2names,SIMPLIFY = FALSE)
allFit3ResQPNB <- mapply(function(x,y) getAllCBPStable(x,"fit3fullqualityElectionW.rda",y),FolderSavelist,allfit2names,SIMPLIFY = FALSE)


#FitCompFunc combines fit2 and fit3 summary information so its simple to compare which is best (has lowest correlation)
Tog <- FitCompFunc(allFit2ResQPNB,allFit3ResQPNB)
write.csv(Tog, file ="Results/AllQualityFull_Fit2Fit3comp.csv")

#Use Tog to look at each programme-outcome and decide whether parametric (fit2) or non-parametric (fit3) is best

#####################################################################################################################
#####################################################################################################################

#For programme-outcome 13-21 (Child malnutrition, Infant mortality 00-10 and Infant mortality 04-13)
#Run an initial simple model, and model with state interaction, and decide which model is best
#if the state interaction model has a significant interaction (assessed running an Anova) and a lower AIC this will be selected
#if not, the simpler model will be selected as the best model.
#Then based on that model, influential points (based on Dfbetas) will be identified and excluded, and final CBGPS weights created
#that will be used to run the robust model

##################################################################################################################

#Use SampleFunc to run CBGPS weighted multivariate regression models for each programme-outcome combination
#specified in the sourced lists

#IF want to run one programme-outcome at a time
onemodL <- SampleFunc("ProgrammeOutcome/ZH/Childmal/","13.ZHandChildmal_robustness.R",
                      "#temp1","#temp2","2004","2004to2013","ZH","Biomefor2004Analysis","ProgrammeOutcome/ZH/Childmal/",
                      "fit2fullqualityElectionW.rda","fit3fullqualityElectionW.rda",FullModListRedElectionScale,QPmodFuncfit2,
                      "checkmod1quality")

#Source the 9 lists
sourcePartial("ZH_CBGPSandRegression_robustness.R",startTag="#QPNBListsStart",endTag="#QPNBListsEnd")

#ModFuncL - a new argument needed that says whether its the parametric (fit2) or non-parametric (fit3) that should be selected
#when the multivariate models are run
ModFuncL <- list(QPmodFuncfit2,QPmodFuncfit3,QPmodFuncfit2,
                 NBmodFuncfit3,NBmodFuncfit2,NBmodFuncfit2,
                 QPmodFuncfit2,QPmodFuncfit2,QPmodFuncfit2)

allModsFull <- mapply(function(x,y,z,w,a,b,c,d,e,f) SampleFunc(x,y,"#temp1","#temp2",z,w,a,b,c,d,e,FullModListRedElectionScale,f,"AllQualityMods_W_Election_Full"),
                      Folderlist,Scriptlist,YearSampList,TimeperiodList,ProgList,biomeL,FolderSavelist,fit2L,fit3L,ModFuncL,SIMPLIFY=FALSE)

######################################################################################################################################

#For each of the 9 mods I compare whether linear or state linear is the best model
sourcePartial("ZH_CBGPSandRegression_robustness.R",startTag="#QPNBListsStart",endTag="#QPNBListsEnd")

#In the Quasi-Poisoon model list the Poisson models come first (these are only there to create quasi-AIC)
#The models to focus on in these lists are model 4 (normal) and 6 (with programme-state interaction)
#for Negative Binomial models there are just 3 models, 1 (normal) and 3 (with programme-state interaction)

#Lists that decide which models to compare
linNrL <- list(4,4,4,
               1,1,1,
               4,4,4)
statelinNrL <- list(6,6,6,
                    3,3,3,
                    6,6,6)

#Specify which function to use to create AIC 
#either qAICfunc for Quasi-Poisson models, or r2AICfunc for Negative Binomial (and OLS) models
aicfuncL <- list(qAICfunc,qAICfunc,qAICfunc,
                 r2AICfunc,r2AICfunc,r2AICfunc,
                 qAICfunc,qAICfunc,qAICfunc)

#Make comparison for all 9 models at once

#CompModFuncFin gets out coefficient, SE, pvalue and a best fit value
#best fit value for Robust OLS = adjusted r2, for Negative Binomial its AIC, and Quasi-Poisson its quasi-AIC
#these are gathered in columns modr2Linear, modr2StateLinear and adjR2change

#Error messages will appear for each mode, this is because the model starts by trying to extract an adjusted r2 value (which it cannot)
#then it moves on to extract AIC and when it cannot do that it tries quasi-AIC

allModsFullcomp <- mapply(function(x,y,z,w,a,b) CompModFuncFin(x,y,"#temp1","#temp2",z,"AllQualityMods_W_Election_Full.rda",w,a,b,":stateName*"),
                          Folderlist,Scriptlist,FolderSavelist,linNrL,statelinNrL,aicfuncL,SIMPLIFY=FALSE)
allModsFullcompDF <- data.frame(do.call("rbind",allModsFullcomp))

#Write as a csv
write.csv(allModsFullcompDF, file ="Results/AllQualityQPNB_W_Election_Full_linstatelincomp.csv")

#allModsFullcompDF is used to see which model is the best model, either with or without a state interaction
#that model will be the model used to identify and exclude influential points

#######################################################################################################
#######################################################################################################

#Create new CBGPS weights, on the sample where influential points have been excluded
#Use a new CBGPS function - GetCBGPSfit2new - which takes an existing model, identifies influential points and exclude these and THEN
#creates CBGPS weights based on the reduced sample

#######################################################################################################
#######################################################################################################

#Parametric CBGPS (fit2) without influential points

#Call the lists for the 9 programme-outcome combinations
sourcePartial("ZH_CBGPSandRegression_robustness.R",startTag="#QPNBListsStart",endTag="#QPNBListsEnd")

#New argument: the final models that makes the basis for influential points and the final df
linNrL <- list(4,4,4,
               1,1,1,
               4,4,4)

#AllCoreMods_W_Election_Full.rda are the core models used to find the influential points
#fit2coreElectionW_noInfl is the NEW CBGPS, i.e. the one without influential points
allModFullFit2 <- mapply(function(x,y,z,w,b,c,d,e,f,g) GetCBGPSfit2new(x,y,z,w,b,c,d,sqrtFunc,e,"AllQualityMods_W_Election_Full.rda",f,g,
                                                                       "fit2qualityElectionW_noInfl"),
                         Folderlist,Scriptlist,startList,endList,
                         YearSampList,TimeperiodList,ProgList,FolderSavelist,linNrL,
                         biomeL,SIMPLIFY = FALSE)

#Save the log output
allModFullFit2DF <- data.frame(do.call("rbind",allModFullFit2))
write.csv(allModFullFit2DF, file ="Results/allfit2ModsQuality_W_election_NoInfl.csv")

#################################################################################################################################

#Non-Parametric CBGPS (fit3) without influential points

#Call the lists for the 9 programme-outcome combinations
sourcePartial("ZH_CBGPSandRegression_robustness.R",startTag="#QPNBListsStart",endTag="#QPNBListsEnd")

#New argument: the final models that makes the basis for influential points and the final df
#Select interaction model only when AIC/quasi-AIC is lower and interaction is significant
linNrL <- list(4,4,4,
               1,1,1,
               4,4,4)

#Run all the weights
allModFullFit3 <- mapply(function(x,y,z,w,b,c,d,e,f,g) GetCBGPSfit3new(x,y,z,w,b,c,d,sqrtFunc,e,"AllQualityMods_W_Election_Full.rda",f,g,
                                                                       "fit3qualityElectionW_noInfl"),
                         Folderlist,Scriptlist,startList,endList,
                         YearSampList,TimeperiodList,ProgList,FolderSavelist,linNrL,
                         biomeL,SIMPLIFY = FALSE)

#Save the log
allModFullFit3DF <- data.frame(do.call("rbind",allModFullFit3))
write.csv(allModFullFit3DF, file ="Output/Graphs/Analysis/allfit3ModsQuality_W_election_NoInfl.csv")

#################################################################################################################################
#################################################################################################################################

#Compare parametric and non-parametric CBGPS for all (now the 9 weights in the middle are the ones without influential points)

sourcePartial("ZH_CBGPSandRegression_robustness.R",startTag="#allListsStart",endTag="#allListsEnd")

allFit2ResQPNB <- mapply(function(x,y,z) getAllCBPStable(x,y,z),FolderSavelist,fit2L,allfit2names,SIMPLIFY = FALSE)
allFit3ResQPNB <- mapply(function(x,y,z) getAllCBPStable(x,y,z),FolderSavelist,fit3L,allfit2names,SIMPLIFY = FALSE)

Tog <- FitCompFunc(allFit2ResQPNB,allFit3ResQPNB)
write.csv(Tog, file ="Results/AllQuality_Fit2Fit3compNoInf.csv")

#################################################################################################################################
#################################################################################################################################

#HERE IS THE FINAL RUN FOR ALL ROBUSTNESS ROBUST MODELS

#Use SampleFuncNewReduced if running one model manually
#Use SampleFuncNew together with mapply to run all sequentially 

#################################################################################################################################
#################################################################################################################################

#If want to run one model one at a time can use SampleFuncNewReduced to get the correct sample [[1]] and model formula [[2]]
#If want to run all models at once use SampleFuncNew and mapply further down

#################################################################################################################################

#Example for Robust OLS (ZH-Kcalorie model)
FullMod <- SampleFuncNewReduced("ProgrammeOutcome/ZH/Kcal/","1.ZHandKcal_robustness.R",
                                "#start2_","#fin2_","2004","2004to2013","ZH",sqrtFuncEmpty,
                                "ProgrammeOutcome/ZH/Kcal/","AllQualityMods_W_Election_Full.rda",NA,
                                "Biomefor2004Analysis","fit2fullqualityElectionW.rda","fit3fullqualityElectionW.rda",
                                FullModListRedElectionScale)

RuralPoverty04quality <- FullMod[[1]]#df
FullMod <- FullMod[[2]]#formula

#Running simpler model (LinMod) and model with state interaction (StateMod)
LinMod <- lmrob(FullMod[[1]], weights = fit2$weights,data=RuralPoverty04quality,fast.s.large.n = Inf,max.it=500,k.max=2000)
NoZHMod <- lmrob(FullMod[[2]], weights = fit2$weights,data=RuralPoverty04quality,fast.s.large.n = Inf,max.it=500,k.max=2000)
StateMod <- lmrob(FullMod[[3]], weights = fit2$weights,data=RuralPoverty04quality,fast.s.large.n = Inf,max.it=500,k.max=2000)

#Save the 3 in a list (same as get when using SampleFuncNew). If name the same will then and override the old version
AllMods <- list(LinMod,NoZHMod,StateMod)
save(AllMods, file = paste("ProgrammeOutcome/BF/Kcal/","AllQualityMods_W_Election_NoInfl",sep="",paste(".rda",sep="")))

#############################################################################################

#Example for Quasi-Poisson model
FullMod <- SampleFuncNewReduced("ProgrammeOutcome/ZH/Childmal/","13.ZHandChildmal_robustness.R",
                                "#start2_","#fin2_","2004","2004to2013","ZH",sqrtFunc,
                                "ProgrammeOutcome/ZH/Childmal/","AllQualityMods_W_Election_Full.rda",4,
                                "Biomefor2004Analysis","fit2qualityElectionW_noInfl.rda","fit2qualityElectionW_noInfl.rda",
                                FullModListRedElectionScale)

RuralPoverty04quality <- FullMod[[1]]#df
FullMod <- FullMod[[2]]#formula

#1. First need to run the Poisson models

#1.
my.formula <- FullMod[[1]]
Poisson <- try(glm(my.formula, weights = fit2$weights,data=RuralPoverty04quality,family=poisson,control=glm.control(maxit=10000)))

#2. 
my.formula <- FullMod[[2]]
nullPoisson <- try(glm(my.formula, weights = fit2$weights,data=RuralPoverty04quality,family=poisson,control=glm.control(maxit=10000)))

#3.
my.formula <- FullMod[[3]]
Poissonstate <- try(glm(my.formula, weights = fit2$weights,data=RuralPoverty04quality,family=poisson,control=glm.control(maxit=10000)))


#2. Then run the Quasi Poisson models

#4.
my.formula <- FullMod[[1]]
QuasiPoisson <- try(glm(my.formula, weights = fit2$weights,data=RuralPoverty04quality,family=quasipoisson,control=glm.control(maxit=10000)))

#5.
my.formula <- FullMod[[2]]
nullQuasiPoisson <- try(glm(my.formula, weights = fit2$weights,data=RuralPoverty04quality,family=quasipoisson,control=glm.control(maxit=10000)))

#6. 
my.formula <- FullMod[[3]]
QuasiPoissonstate <- try(glm(my.formula, weights = fit2$weights,data=RuralPoverty04quality,family=quasipoisson,control=glm.control(maxit=10000)))

#Save the 6 in a list (same as get when using SampleFuncNew). If name the same will then and override the old version
AllMods <- list(Poisson,nullPoisson,Poissonstate,QuasiPoisson,nullQuasiPoisson,QuasiPoissonstate)
save(AllMods, file = paste("ProgrammeOutcome/ZH/Childmal/","AllQualityMods_W_Election_NoInfl",sep="",paste(".rda",sep="")))


###############################################################################################
###############################################################################################

#Running all models at once

sourcePartial("ZH_CBGPSandRegression_robustness.R",startTag="#allListsStart",endTag="#allListsEnd")

#Argument to select whether parametric (fit2) or non-parametric (fit3) will be used as CBGPS weights
ModFuncL <- list(ROBCBPSmodFuncfit2,ROBCBPSmodFuncfit3,ROBCBPSmodFuncfit2,
                 ROBCBPSmodFuncfit2,ROBCBPSmodFuncfit2,ROBCBPSmodFuncfit2,
                 ROBCBPSmodFuncfit2,ROBCBPSmodFuncfit2,ROBCBPSmodFuncfit2,
                 ROBCBPSmodFuncfit2,ROBCBPSmodFuncfit2,ROBCBPSmodFuncfit2,
                 
                 QPmodFuncfit2,QPmodFuncfit3,QPmodFuncfit2,
                 NBmodFuncfit2,NBmodFuncfit2,NBmodFuncfit2,
                 QPmodFuncfit3,QPmodFuncfit3,QPmodFuncfit3,
                 
                 ROBCBPSmodFuncfit2,ROBCBPSmodFuncfit3,ROBCBPSmodFuncfit2)

#Select whether need to exclude influential points (sqrtFunc) or not (sqrtFuncEmpty) for the model
sqrtL <- list(sqrtFuncEmpty,sqrtFuncEmpty,sqrtFuncEmpty,
              sqrtFuncEmpty,sqrtFuncEmpty,sqrtFuncEmpty,
              sqrtFuncEmpty,sqrtFuncEmpty,sqrtFuncEmpty,
              sqrtFuncEmpty,sqrtFuncEmpty,sqrtFuncEmpty,
              
              sqrtFunc,sqrtFunc,sqrtFunc,
              sqrtFunc,sqrtFunc,sqrtFunc,
              sqrtFunc,sqrtFunc,sqrtFunc,
              
              sqrtFuncEmpty,sqrtFuncEmpty,sqrtFuncEmpty)

#Only need number and model for the QP/NB ones - these are the ones that the influential points are removed from
#only pronaf-childmal was state linear in the non-robust model
nrL <- list(NA,NA,NA,
            NA,NA,NA,
            NA,NA,NA,
            NA,NA,NA,
            
            4,4,4,
            1,1,1,
            4,6,4,
            
            NA,NA,NA)

#Run all - the model function is SampleFuncNew - includes arguments to exclude infuential points
allModsFull <- mapply(function(x,y,p,q,z,w,a,s,b,c,d,e,f,g) SampleFuncNew(x,y,p,q,z,w,a,s,b,"AllQualityMods_W_Election_Full.rda",c,
                                                                          d,e,f,FullModListRedElectionScale,g,"AllQualityMods_W_Election_NoInfl"),
                      Folderlist,Scriptlist,startList,endList,YearSampList,TimeperiodList,ProgList,sqrtL,
                      FolderSavelist,nrL,biomeL,fit2L,fit3L,ModFuncL,SIMPLIFY=FALSE)

#if any models come with a warning/non-convergence in the robust OLS models (1-12 and 22-24) run again and should work ok
#robust OLS models go through an iterative process when creating the robustness weights, i.e. it first selects a random sample of points and
#then calculates robustness weights based on these. Because of this, warning/non-convergence in coefficients 
#can on occacion occur in the more complex state-interaction models (otherwise model results are only negligibly different when repeating), 
#but then just run again

##########################################################################################################
#########################################################################################################

#Compare between linear and state linear model for all

#9999
sourcePartial("ZH_CBGPSandRegression_robustness.R",startTag="#allListsStart",endTag="#allListsEnd")

linNrL <- list(1,1,1,
               1,1,1,
               1,1,1,
               1,1,1,
               
               4,4,4,
               1,1,1,
               4,4,4,
               
               1,1,1)

statelinNrL <- list(3,3,3,
                    3,3,3,
                    3,3,3,
                    3,3,3,
                    
                    6,6,6,
                    3,3,3,
                    6,6,6,
                    
                    3,3,3)

#specify whether use the function for qAIC (for QP models) or r2AIC which is used for all robust and NB models
aicfuncL <- list(r2AICfunc,r2AICfunc,r2AICfunc,
                 r2AICfunc,r2AICfunc,r2AICfunc,
                 r2AICfunc,r2AICfunc,r2AICfunc,
                 r2AICfunc,r2AICfunc,r2AICfunc,
                 
                 qAICfunc,qAICfunc,qAICfunc,
                 r2AICfunc,r2AICfunc,r2AICfunc,
                 qAICfunc,qAICfunc,qAICfunc,
                 
                 r2AICfunc,r2AICfunc,r2AICfunc)

intvarL <- list(":stateName*",":stateName*",":stateName*",
                ":stateName*",":stateName*",":stateName*",
                ":stateName*",":stateName*",":stateName*",
                ":stateName*",":stateName*",":stateName*",
                
                ":stateName*",":stateName*",":stateName*",
                ":stateName*",":stateName*",":stateName*",
                ":stateName*",":stateName*",":stateName*",
                
                ":Biomefor2004AnalysisNoHybrid*",":Biomefor2004AnalysisNoHybrid*",":Biomefor2004AnalysisNoHybrid*")


allModsFullcomp <- mapply(function(x,y,p,q,z,w,a,b,c) CompModFuncFin(x,y,p,q,z,"AllQualityMods_W_Election_NoInfl.rda",w,a,b,c),
                          Folderlist,Scriptlist,startList,endList,FolderSavelist,linNrL,statelinNrL,aicfuncL,intvarL,SIMPLIFY=FALSE)
allModsFullcompDF <- data.frame(do.call("rbind",allModsFullcomp))

write.csv(allModsFullcompDF, file ="Results/AllQuality_W_Election_NoInfl_linstatelincomp.csv")

##########################################################################################################
##########################################################################################################

